# Mount Google Drive
from google.colab import drive # import drive from google colab
ROOT = "/content/drive/" # default location for the drive
print(ROOT) # print content of ROOT (Optional)
drive.mount(ROOT) # we mount the google drive at /content/drive
#%cd 'drive'
%pwd
!ls
# import join used to join ROOT path and MY_GOOGLE_DRIVE_PATH
from os.path import join
import os
ROOT = "/content/drive/"
# path to your project on Google Drive
MY_GOOGLE_DRIVE_PATH = 'My Drive/Colab/CapstoneProject/'
train_folder = 'Data/stage_2_train_images/'
saved_folder = 'Saved_Data/'
PROJECT_PATH = join(ROOT, MY_GOOGLE_DRIVE_PATH)
TRAIN_PATH = join(PROJECT_PATH, train_folder)
SAVE_PATH = join(PROJECT_PATH, saved_folder)
# It's good to print out the value if you are not sure
print("PROJECT_PATH: ", PROJECT_PATH)
print("TRAIN_PATH: ", TRAIN_PATH)
print("SAVE_PATH: ", SAVE_PATH)
# Kaggle Download
if(True):
!pip uninstall -y kaggle
!pip install --upgrade pip
!pip install kaggle==1.5.6
!kaggle -v
#List Kaggle DataSets
#!kaggle datasets list
#List Kaggle Competitions
#!kaggle competitions list
#List Competitions with string
!kaggle competitions list -s pneumonia
#KAGGLE_DIR = '/content/drive/My Drive/Colab/Kaggle/'
KAGGLE_DIR = '/content/sample_data/Kaggle/'
%cd {KAGGLE_DIR}
!ls
competition_name = 'rsna-pneumonia-detection-challenge'
!mkdir {competition_name}
DOWNLOAD_DIR = KAGGLE_DIR+competition_name+'/'
print(DOWNLOAD_DIR)
import os
os.environ['KAGGLE_CONFIG_DIR'] = KAGGLE_DIR
!kaggle competitions download -c {competition_name}
print('Download Complete')
zip_file = '/content/drive/My Drive/Colab/CapstoneProject/Data/rsna-pneumonia-detection-challenge.zip'
unzip_folder = '/content/drive/My Drive/Colab/CapstoneProject/Data/'
if(False): # Have set it to false to avoid it from running again
!unzip {zip_file} -d {unzip_folder}
print('unzipping complete')
%cd {PROJECT_PATH}
%cd 'Data'
import pandas as pd
class_info = pd.read_csv('stage_2_detailed_class_info.csv')
print(class_info.shape)
print(class_info.head())
train_labels = pd.read_csv('stage_2_train_labels.csv')
print(train_labels.shape)
print(train_labels.head())
# Total 30227 observations
#Data Cleanup
def check_data(data_file):
print('\nIs NA:\n',data_file.isna().sum())
print('\nUnique Patients:\n',len(data_file['patientId'].unique()))
# print(train_labels.isna().sum())
# print(data_file[data_file['Target'] == 1].isna().sum())
#lot of labels are missing. use target =1 only
check_data(class_info)
check_data(train_labels)
#total 26k unique patients are present
# 20672 entries do not have x,y in it
# Merging the data in two csv into one.
class_info_train_labels_merge = train_labels.merge(class_info, left_on='patientId', right_on='patientId', how='inner')
class_info_train_labels_merge.head()
check_data(class_info_train_labels_merge)
#20672 rows do not have X and Y info.
print(class_info_train_labels_merge[class_info_train_labels_merge['Target'] == 0].isna().sum())
#For target =1 we have values in all the columns.
print(class_info_train_labels_merge[class_info_train_labels_merge['class'] == 'Normal'].isna().sum())
# 8851 missing values for normal class
print(class_info_train_labels_merge[class_info_train_labels_merge['class'] == 'No Lung Opacity / Not Normal'].isna().sum())
# 11821 missing values, total of 20672 is maintained
print(class_info_train_labels_merge[class_info_train_labels_merge['class'] == 'Lung Opacity'].isna().sum())
# no missing values
class_info_train_labels_merge.groupby('class')['Target'].unique()
# All lung opacity has target=1 and no other class has target 1. so all pneumonia cases are with lung opacity and target=1
#Check distribution
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style("dark")
sns.countplot(train_labels['Target'])
#Set Project Path
%cd {PROJECT_PATH}
%cd 'Data/stage_2_train_images'
#Load Dependencies
!pip install pydicom
import pydicom
import matplotlib.pyplot as plt
# Function to load an image
def load_image(imagename):
image1 = pydicom.dcmread(imagename)
print(type(image1))
return image1
#Check sample images
fig, axes = plt.subplots(nrows=1, ncols=3 ,figsize=(20,15))
axes[0].set_title('No Lung Opacity')
axes[0].imshow(load_image('00322d4d-1c29-4943-afc9-b6754be640eb.dcm').pixel_array, cmap=plt.cm.bone)
axes[1].set_title('Normal')
axes[1].imshow(load_image('003d8fa0-6bf1-40ed-b54c-ac657f8495c5.dcm').pixel_array, cmap=plt.cm.bone)
axes[2].set_title('Lung Opacity')
axes[2].imshow(load_image('00436515-870c-4b36-a041-de91049b9ab4.dcm').pixel_array, cmap=plt.cm.bone)
image1 = pydicom.dcmread('00436515-870c-4b36-a041-de91049b9ab4.dcm').pixel_array
image1.shape
# Images are of dimension 1024x1024 which would too large for our network.
# I would reduce the dimension during model preparation to improve speed of training
#Check data within a sample file
dcm_data = pydicom.read_file('00322d4d-1c29-4943-afc9-b6754be640eb.dcm')
print(dcm_data)
# Function to fetch dicom data from files
# We need to create a combined file with all the parameters in one place.
vars = ['Modality', 'PatientAge', 'PatientSex', 'BodyPartExamined', 'ViewPosition', 'ConversionType', 'Rows', 'Columns', 'PixelSpacing']
def get_DICOM_metadata(class_info):
if(os.path.isfile(PROJECT_PATH+'Data/class_info_train_labels_merge_metadata.csv')):
class_info = pd.read_csv(PROJECT_PATH+'Data/class_info_train_labels_merge_metadata.csv')
class_info.drop([class_info.columns[0]], axis=1, inplace=True)
else:
#get file list from folder
image_train_path = os.listdir(PROJECT_PATH+'Data/stage_2_train_images')
image_test_path = os.listdir(PROJECT_PATH+'Data/stage_2_test_images')
print("Number of images in train set:", len(image_train_path),"\nNumber of images in test set:", len(image_test_path))
vars = ['Modality', 'PatientAge', 'PatientSex', 'BodyPartExamined', 'ViewPosition', 'ConversionType', 'Rows', 'Columns', 'PixelSpacing']
for var in vars:
class_info[var] = None
#iterate on files and append metadata
for i,filename in enumerate(image_train_path):
#print('processing:',i,filename)
dcm_data = pydicom.read_file(PROJECT_PATH+'Data/stage_2_train_images/'+filename)
idx = (class_info['patientId']==dcm_data.PatientID)
class_info.loc[idx,'Modality'] = dcm_data.Modality
class_info.loc[idx,'PatientAge'] = pd.to_numeric(dcm_data.PatientAge)
class_info.loc[idx,'PatientSex'] = dcm_data.PatientSex
class_info.loc[idx,'BodyPartExamined'] = dcm_data.BodyPartExamined
class_info.loc[idx,'ViewPosition'] = dcm_data.ViewPosition
class_info.loc[idx,'ConversionType'] = dcm_data.ConversionType
class_info.loc[idx,'Rows'] = dcm_data.Rows
class_info.loc[idx,'Columns'] = dcm_data.Columns
class_info.loc[idx,'PixelSpacing'] = str.format("{:4.3f}",dcm_data.PixelSpacing[0])
#save to a file for future and dont run the function if the file exists
class_info.to_csv(PROJECT_PATH+'Data/class_info_train_labels_merge_metadata.csv')
class_info['xc'] = class_info['x'] + class_info['width'] / 2
class_info['yc'] = class_info['y'] + class_info['height'] / 2
return class_info
class_info_train_labels_merge_metadata = get_DICOM_metadata(class_info_train_labels_merge)
class_info_train_labels_merge_metadata.head()
# Check improper values in dataset
def check_data_set(class_info):
print(class_info.isna().sum())
#print(train_labels[train_labels['Target'] == 1].isna().sum())
check_data_set(class_info)
# Draw graphs from the data
def drawgraphs(data_file,columns,hue=False,width =15,showdistribution=True):
print('Creating graph for X axis:'," and Y axis:",columns)
length=len(columns)*6
total = float(len(data_file))
fig, axes = plt.subplots(nrows=len(columns) if len(columns)>1 else 1,ncols=1,figsize=(width, length) )
for index,content in enumerate(columns):
plt.title(content)
currentaxes = 0
if(len(columns)>1):
currentaxes = axes[index]
else:
currentaxes = axes
if(hue):
sns.countplot(x=columns[index],data=data_file,ax=currentaxes, hue=hue)
else:
sns.countplot(x=columns[index],data=data_file,ax=currentaxes)
if(showdistribution):
for p in (currentaxes.patches):
height = p.get_height()
if(height>0 and total>0):
currentaxes.text(p.get_x()+p.get_width()/2., height + 3, '{:1.2f}%'.format(100*height/total), ha="center")
return True
drawgraphs(data_file= class_info_train_labels_merge_metadata,columns= class_info_train_labels_merge_metadata.columns[5:16],hue= 'class')
#Inference:
# - All lung opacity is in Target=1 only
# - Patient age has a distribution needs to be bucketized to get clearer picture
# - Higher %age of males have this prob
# - View position AP has higher lung opacity than PA and % of normal is also lower in AP
# - Coversion type, rows columns, body part examined, modality has only 1 value so its not useful
# - Pixel spacing 0.168 has higher lung opacity.
drawgraphs(data_file= class_info_train_labels_merge_metadata, columns= ['PatientAge'], width =20, showdistribution=True)
import numpy as np
custom_bucket_array = np.linspace(0, 160, 17)
custom_bucket_array
class_info_train_labels_merge_metadata['PatientAgeBucket'] =pd.cut(class_info_train_labels_merge_metadata['PatientAge'], custom_bucket_array)
class_info_train_labels_merge_metadata.head(1)
drawgraphs(data_file= class_info_train_labels_merge_metadata, columns= ['PatientAgeBucket'], width =20, showdistribution=True, hue='PatientSex')
# there are higher number of patients in 50-60 age group as a result changes of having a positive patient in this age group is higher.
drawgraphs(data_file= class_info_train_labels_merge_metadata, columns= ['PatientSex'], width =10, showdistribution=True, hue='Target')
# There are higher number of records of male than female. Both for target 0 and 1.
class_info_train_labels_merge_metadata['xc'] = class_info_train_labels_merge_metadata['x'] + class_info_train_labels_merge_metadata['width'] / 2
class_info_train_labels_merge_metadata['yc'] = class_info_train_labels_merge_metadata['y'] + class_info_train_labels_merge_metadata['height'] / 2
class_info_train_labels_merge_metadata.head(1)
from matplotlib.patches import Rectangle
def plot_window(data,color_point, color_window,text):
fig, ax = plt.subplots(1,1,figsize=(7,7))
plt.title("Centers of Lung Opacity rectangles over rectangles\n{}".format(text))
data.plot.scatter(x='xc', y='yc', xlim=(0,1024), ylim=(0,1024), ax=ax, alpha=0.8, marker=".", color=color_point)
for i, crt_sample in data.iterrows():
ax.add_patch(Rectangle(xy=(crt_sample['x'], crt_sample['y']),
width=crt_sample['width'],height=crt_sample['height'],alpha=3.5e-3, color=color_window))
plt.show()
classify = (class_info_train_labels_merge_metadata['ViewPosition']=='AP')
plot_window(class_info_train_labels_merge_metadata[ classify ],'green', 'yellow', 'Patient View Position: PA')
# Lung Opacities are present mostly in the central part.
classify = (class_info_train_labels_merge_metadata['ViewPosition']=='PA')
plot_window(class_info_train_labels_merge_metadata[ classify ],'blue', 'red', 'Patient View Position: PA')
# Distribution is slightly different for view position PA
classify = (class_info_train_labels_merge_metadata['ViewPosition']=='PA') & (class_info_train_labels_merge_metadata['PatientAgeBucket']==pd.Interval(50,60))
plot_window(class_info_train_labels_merge_metadata[ classify ],'blue', 'red', 'Patient View Position: PA')
# Checking the distribution for 50-60 age group.
classify = (class_info_train_labels_merge_metadata['ViewPosition']=='PA') & (class_info_train_labels_merge_metadata['PatientAgeBucket']==pd.Interval(60,70))
plot_window(class_info_train_labels_merge_metadata[ classify ],'blue', 'red', 'Patient View Position: PA')
def show_dicom_images_with_boxes(data):
img_data = list(data.T.to_dict().values())
f, ax = plt.subplots(3,3, figsize=(16,18))
for i,data_row in enumerate(img_data):
patientImage = data_row['patientId']+'.dcm'
imagePath = os.path.join(PROJECT_PATH,"Data/stage_2_train_images/",patientImage)
#print(imagePath)
data_row_img_data = pydicom.read_file(imagePath)
modality = data_row_img_data.Modality
age = data_row_img_data.PatientAge
sex = data_row_img_data.PatientSex
data_row_img = load_image(imagePath)
ax[i//3, i%3].imshow(data_row_img.pixel_array, cmap=plt.cm.bone)
ax[i//3, i%3].axis('off')
ax[i//3, i%3].set_title('ID: {}\nModality: {} Age: {} Sex: {} Target: {}\nClass: {}'.format(
data_row['patientId'],modality, age, sex, data_row['Target'], data_row['class']))
rows = class_info_train_labels_merge_metadata[class_info_train_labels_merge_metadata['patientId']==data_row['patientId']]
box_data = list(rows.T.to_dict().values())
for j, row in enumerate(box_data):
ax[i//3, i%3].add_patch(Rectangle(xy=(row['x'], row['y']),
width=row['width'],height=row['height'],
color="yellow",alpha = 0.1))
plt.show()
show_dicom_images_with_boxes(class_info_train_labels_merge_metadata[class_info_train_labels_merge_metadata['Target']==0].sample(9))
show_dicom_images_with_boxes(class_info_train_labels_merge_metadata[class_info_train_labels_merge_metadata['Target']==1].sample(9))
import os
import csv
import random
!pip install pydicom
import pydicom
import numpy as np
import pandas as pd
from skimage import io
from skimage import measure
from skimage.transform import resize
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
import matplotlib.patches as patches
# Copy from google drive to colab directory to speed up I/O
# !cp -r -v '/content/drive/My Drive/Colab/CapstoneProject/Data/stage_2_train_images/' '/content/sample_data/'
# Aborted this since it was taking too much time.
#TRAIN_PATH = '/content/sample_data/stage_2_train_images/'
print(TRAIN_PATH)
filenames = {}
read_directory = False
if(read_directory):
filenames = os.listdir(TRAIN_PATH)
pd.DataFrame(filenames).to_csv(SAVE_PATH+'train_path_listdir.csv')
else:
filenames=pd.read_csv(SAVE_PATH+'train_path_listdir.csv', usecols=[1],header=0).values.tolist()
filenames = [val for sublist in filenames for val in sublist]
# Use part of the data for training earlier and then run for 100% of the data
percentage_data_used = 100
file_count = int(len(filenames)*percentage_data_used/100)
print("Total files available:",file_count)
random.shuffle(filenames)
# split into train and validation filenames
n_valid_samples = int(file_count * 0.3)
train_filenames = filenames[n_valid_samples:file_count]
valid_filenames = filenames[:n_valid_samples]
print('n train samples', len(train_filenames))
print('n valid samples', len(valid_filenames))
n_train_samples = len(filenames) - n_valid_samples
image_dimension = 128
print('Image Dimension to use:',image_dimension)
print('sample file:',filenames[0])
# Check dist of selected files based on the csv that was provided.
# Did not try stratified sampling since the distribution is not impacted much.
def check_distribution(dataframe_to_check):
filename_check = pd.DataFrame(columns=['patientId','class'])
#get filename
for filename in dataframe_to_check:
filename_check = filename_check.append(class_info[class_info['patientId'] == filename.split('.')[0]])
print('Rows',len(filename_check))
print('unique',len(filename_check['patientId'].unique()))
print(filename_check['class'].value_counts(normalize = True))
check_distribution(train_filenames)
check_distribution(valid_filenames)
check_distribution(filenames)
# Overall Distribution is very similar to the distribution in train and validation data set.
# identifying if there are any files beside dcm in the folder
for name in filenames:
le = len(name)
if(name[le-3:le] != 'dcm'):
print(name)
# empty dictionary
pneumonia_locations = {}
# load table
with open(os.path.join(PROJECT_PATH,'Data/stage_2_train_labels.csv'), mode='r') as infile:
# open reader
reader = csv.reader(infile)
# skip header
next(reader, None)
# loop through rows
for rows in reader:
# retrieve information
filename = rows[0]
location = rows[1:5]
pneumonia = rows[5]
# if row contains pneumonia add label to dictionary
# which contains a list of pneumonia locations per filename
if pneumonia == '1':
# convert string to float to int
location = [int(float(i)) for i in location]
# save pneumonia location in dictionary
if filename in pneumonia_locations:
pneumonia_locations[filename].append(location)
else:
pneumonia_locations[filename] = [location]
import keras
# The dataset is too large to fit into memory, so we need to create a generator that loads data on the fly.
# Generator class to handle:
# Image load from folder during train and predict modes, shuffle on epoc end,
# resize loaded images, augment if needed, add trailing channel dimension
class generator(keras.utils.Sequence):
def __init__(self, folder, filenames, pneumonia_locations=None, batch_size=32, image_size=image_dimension, shuffle=True, augment=False, predict=False):
self.folder = folder
self.filenames = filenames
self.pneumonia_locations = pneumonia_locations
self.batch_size = batch_size
self.image_size = image_size
self.shuffle = shuffle
self.augment = augment
self.predict = predict
self.on_epoch_end()
# Loads the file from folder, resizes and augments the data with horizontal flip
def __load__(self, filename):
# load dicom file as numpy array
#print('reading file:', filename)
img = pydicom.dcmread(os.path.join(self.folder, filename), force=True).pixel_array
# create empty mask
msk = np.zeros(img.shape)
# get filename without extension
filename = filename.split('.')[0]
# if image contains pneumonia
if filename in self.pneumonia_locations:
# loop through pneumonia
for location in self.pneumonia_locations[filename]:
# add 1's at the location of the pneumonia
x, y, w, h = location
msk[y:y+h, x:x+w] = 1
# resize both image and mask
img = resize(img, (self.image_size, self.image_size), mode='reflect')
msk = resize(msk, (self.image_size, self.image_size), mode='reflect') > 0.5
# if augment then horizontal flip half the time
if self.augment and random.random() > 0.5:
img = np.fliplr(img)
msk = np.fliplr(msk)
# add trailing channel dimension
img = np.expand_dims(img, -1)
msk = np.expand_dims(msk, -1)
return img, msk
# Loads images during prediction cycles
def __loadpredict__(self, filename):
# load dicom file as numpy array
# print('reading file:', filename)
img = pydicom.dcmread(os.path.join(self.folder, filename), force=True).pixel_array
# resize image
img = resize(img, (self.image_size, self.image_size), mode='reflect')
# add trailing channel dimension
img = np.expand_dims(img, -1)
return img
# Generator must implement this getter function
def __getitem__(self, index):
# select batch
filenames = self.filenames[index*self.batch_size:(index+1)*self.batch_size]
# predict mode: return images and filenames
if self.predict:
# load files
imgs = [self.__loadpredict__(filename) for filename in filenames]
# create numpy batch
imgs = np.array(imgs)
return imgs, filenames
# train mode: return images and masks
else:
# load files
items = [self.__load__(filename) for filename in filenames]
# unzip images and masks
imgs, msks = zip(*items)
# create numpy batch
imgs = np.array(imgs)
msks = np.array(msks)
return imgs, msks
# Shuffle data before start of next epoc
def on_epoch_end(self):
if self.shuffle:
random.shuffle(self.filenames)
def __len__(self):
if self.predict:
# return everything
return int(np.ceil(len(self.filenames) / self.batch_size))
else:
# return full batches only
return int(len(self.filenames) / self.batch_size)
# create 1 downsample layer, each containing 4 layers in it
def create_downsample(channels, inputs):
x = keras.layers.BatchNormalization(momentum=0.9)(inputs)
x = keras.layers.LeakyReLU(0)(x)
x = keras.layers.Conv2D(channels, 1, padding='same', use_bias=False)(x)
x = keras.layers.MaxPool2D(2)(x)
return x
# creates 1 residual layer, each containing 6 layers in it.
def create_resblock(channels, inputs):
x = keras.layers.BatchNormalization(momentum=0.9)(inputs)
x = keras.layers.LeakyReLU(0)(x)
x = keras.layers.Conv2D(channels, 3, padding='same', use_bias=False)(x)
x = keras.layers.BatchNormalization(momentum=0.9)(x)
x = keras.layers.LeakyReLU(0)(x)
x = keras.layers.Conv2D(channels, 3, padding='same', use_bias=False)(x)
return keras.layers.add([x, inputs])
# Model creator
# Depth = number of layers in the model
def create_network(input_size, channels, n_blocks=2, depth=4):
# input layers - 2 layer
inputs = keras.Input(shape=(input_size, input_size, 1))
x = keras.layers.Conv2D(channels, 3, padding='same', use_bias=False)(inputs)
# residual blocks (4*4 downsample + 4*2*6 resblock = 64 layers)
for d in range(depth):
channels = channels * 2
x = create_downsample(channels, x)
for b in range(n_blocks):
x = create_resblock(channels, x)
# output - 4 layers
x = keras.layers.BatchNormalization(momentum=0.9)(x)
x = keras.layers.LeakyReLU(0)(x)
x = keras.layers.Conv2D(1, 1, activation='sigmoid')(x)
outputs = keras.layers.UpSampling2D(2**depth)(x)
model = keras.Model(inputs=inputs, outputs=outputs)
return model
# define iou or jaccard loss function
def iou_loss(y_true, y_pred):
y_true = tf.reshape(y_true, [-1])
y_pred = tf.reshape(y_pred, [-1])
intersection = tf.reduce_sum(y_true * y_pred)
score = (intersection + 1.) / (tf.reduce_sum(y_true) + tf.reduce_sum(y_pred) - intersection + 1.)
return 1 - score
# combine bce loss and iou loss
def iou_bce_loss(y_true, y_pred):
return 0.5 * keras.losses.binary_crossentropy(y_true, y_pred) + 0.5 * iou_loss(y_true, y_pred)
# mean iou as a metric
def mean_iou(y_true, y_pred):
y_pred = tf.round(y_pred)
intersect = tf.reduce_sum(y_true * y_pred, axis=[1, 2, 3])
union = tf.reduce_sum(y_true, axis=[1, 2, 3]) + tf.reduce_sum(y_pred, axis=[1, 2, 3])
smooth = tf.ones(tf.shape(intersect))
return tf.reduce_mean((intersect + smooth) / (union - intersect + smooth))
# create network and compiler
model = create_network(input_size=image_dimension, channels=32, n_blocks=2, depth=4)
model.compile(optimizer='adam',
loss=iou_bce_loss,
metrics=['accuracy', mean_iou])
model.summary()
# cosine learning rate annealing
# changes learning rate based on the number of epocs passed
def cosine_annealing(x):
lr = 0.001
epochs = 25
return lr* (np.cos(np.pi*x/epochs)+1.) /2
learning_rate = tf.keras.callbacks.LearningRateScheduler(cosine_annealing)
# keeps logging the epoc output simultaneously while training
csv_logger = tf.keras.callbacks.CSVLogger(SAVE_PATH + 'logs_cnn_segment.csv', append = True)
# Creating checkpoint of the best model to avoid save errors later on.
# Saves training time once the best model is achieved.
cp = tf.keras.callbacks.ModelCheckpoint(filepath = SAVE_PATH + 'model_checkpoint.h5', verbose = 1, save_best_only = True)
# Keep monitoring val_loss to see if there is any improvement.
# Mostly the model kept loss in a range so keeping patience as 4 to avoid bloating training time.
# Any improvement of 0.5% in val_loss would get captured
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0.005, patience=4, restore_best_weights=True, verbose=1, mode='auto')
# create train and validation generators
train_gen = generator(TRAIN_PATH, train_filenames, pneumonia_locations, batch_size=32, image_size=image_dimension, shuffle=True, augment=True, predict=False)
valid_gen = generator(TRAIN_PATH, valid_filenames, pneumonia_locations, batch_size=32, image_size=image_dimension, shuffle=False, predict=False)
history = model.fit_generator(train_gen, validation_data=valid_gen, callbacks=[learning_rate,csv_logger,early_stopping], epochs=12, workers=4, use_multiprocessing=True)
plt.figure(figsize=(25,6))
plt.subplot(131)
plt.plot(history.epoch, history.history["loss"], label="Train loss")
plt.plot(history.epoch, history.history["val_loss"], label="Valid loss")
plt.legend()
plt.subplot(132)
plt.plot(history.epoch, history.history["accuracy"], label="Train accuracy")
plt.plot(history.epoch, history.history["val_accuracy"], label="Valid accuracy")
plt.legend()
plt.subplot(133)
plt.plot(history.epoch, history.history["mean_iou"], label="Train iou")
plt.plot(history.epoch, history.history["val_mean_iou"], label="Valid iou")
plt.legend()
plt.show()
# Validation loss did not increase by more than 0.4036 even after 4 epocs hence early stopping was hit.
# Validation accuracy also peaked between 96-97%. and started going down later on showing the signs of overfitting after epoc #4
# Red is predicted mask, Blue is actual Mask
for imgs, msks in valid_gen:
# predict batch of images
preds = model.predict(imgs)
# create figure
f, axarr = plt.subplots(4, 8, figsize=(20,15))
# Flatten the array
axarr = axarr.ravel()
axidx = 0
# loop through batch
for img, msk, pred in zip(imgs, msks, preds):
# plot image
axarr[axidx].imshow(img[:, :, 0])
# threshold true mask
comp = msk[:, :, 0] > 0.5
# apply connected components
comp = measure.label(comp)
# apply bounding boxes
predictionString = ''
for region in measure.regionprops(comp):
# retrieve x, y, height and width
y, x, y2, x2 = region.bbox
height = y2 - y
width = x2 - x
axarr[axidx].add_patch(patches.Rectangle((x,y),width,height,linewidth=2,edgecolor='b',facecolor='none'))
# threshold predicted mask
comp = pred[:, :, 0] > 0.5
# apply connected components
comp = measure.label(comp)
# apply bounding boxes
predictionString = ''
for region in measure.regionprops(comp):
# retrieve x, y, height and width
y, x, y2, x2 = region.bbox
height = y2 - y
width = x2 - x
axarr[axidx].add_patch(patches.Rectangle((x,y),width,height,linewidth=2,edgecolor='r',facecolor='none'))
axidx += 1
plt.show()
# only plot one batch
break
# There was considerable IOU for the images where prediction was correct.
1.In our project we started with exploring the given dataset and we find how the various attributes (obtained from both the files and images) are spread across the entire dataset 2.in our project work we have implemented 3 base model Res-net mobile net and the Xception nets 3.in the inpy file we have impleted with 1 base model Resnet and the corresponding model accuracy has been capture as show in the above 4.The Resnet model is trained for 6 epochs since after 6 epoch the model model accuracy not yet improved hence we used early stopping and saved the model.